<%--
  Created by IntelliJ IDEA.
  User: WYX
  Date: 2024/4/12
  Time: 11:39
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" import="java.util.regex.*,java.util.*" %>
<html>
<head>
    <title>实验四：字词的频率</title>
    <link rel="stylesheet" href="../../css/table.css">
</head>
<body>
<form action="" method="post">
    <label for="textAreaId">输入句子</label><br>
    <textarea name="word" id="textAreaId" cols="30" rows="10"></textarea><br>
    <input type="submit" name="submit" value="提交"><br>
    <%
        request.setCharacterEncoding("utf-8");
        String word = request.getParameter("word");
        if (word != null) {
            Map<String, Integer> map = new HashMap<>();
            List<String> list = new ArrayList<>();
            String regex = "[a-zA-Z]+|[一-龥]|-?[0-9]+(\\.[0-9]+)?";
            Pattern pattern = Pattern.compile(regex);
            Matcher matcher = pattern.matcher(word);
            while (matcher.find()) {
                list.add(matcher.group());
                if (map.containsKey(matcher.group())) {
                    map.put(matcher.group(), map.get(matcher.group()) + 1);
                } else {
                    map.put(matcher.group(), 1);
                }
            }
            out.print("出现了" + list.size() + "个字词：<br>" + list + "<br>");
            out.print("出现了" + map.size() + "个不相同的字词：<br>");
            out.print("<table><tr><th>字词</th><th>次数</th><th>频率</th></tr>");
            for (Map.Entry<String, Integer> entry : map.entrySet())
                out.print("<tr><td>" + entry.getKey() + "</td><td>"
                        + entry.getValue() + "</td><td>"
                        + String.format("%.4f", 1.0 * entry.getValue() / list.size())
                        + "</td></tr>");
        }
        out.print("</table>");
    %>
</form>
</body>
</html>